local Data "NationalIndustryFirmGrowth2digit"

noi disp "Obtaining national employment growth by firm-size size data..."
noi disp "Verifying that `Data' does not already exist..."
* This returns an error if the file does not exist.
capture confirm file "`Data'.dta"

* If an error is returned, then the commands below are run.
if _rc == 601 {

*local Data "NationalIndustryFirmGrowth2digit"
		
	noi disp "Data not found.  Building data..."
	noi disp "Loading National 2 digit employment.csv..."
	*Obtain National Employment Growth by Industry-Firmsize
	insheet using "$SourceData\National 2 digit employment.csv", clear
	noi disp "Finished loading."

	*Excluding Finance Industry and NAICS92, the latter because there is no data for large v. small
	drop if industry=="52" | industry=="92"

	* Verify that there are no duplicates
	isid geography industry year firmsize

	* Keep only if the first year observed for each county is 2000.
	bysort geography: egen minyear = min(year)
	keep if minyear==2000
	drop minyear
	* Keep only if the last year observed for each county is 2017.
	bysort geography: egen maxyear = max(year)
	keep if maxyear==2017
	drop maxyear

	sort  industry year firmsize
		collapse (sum) emp, by(industry year firmsize)	
		bysort industry year: egen totemp = sum(emp)
	label var emp "Total employment for the industry, year, and firm size"
		
	/* Combining Lowest Three Employment Categories into one */
	gen Firm123 = emp if firmsize==1 | firmsize==2 | firmsize==3
	* Finds total of all firms of size 1, 2, and 3
	bysort industry year: egen TotalFirm123 = sum(Firm123)
	drop Firm123 
	* Replace observation from firm size 1 with total of firms 1, 2, and 3
	replace emp = TotalFirm123 if firmsize==1
	drop TotalFirm123 
	* Keep only two sizes: Firms 1,2,3 and Firms 5
	keep if firmsize==1 | firmsize==5 


	gen sizeshare = emp/totemp
	label var sizeshare "Share for a size, industry, and year"

	sort industry firmsize year

	foreach span in 1 3 5 15 {
	* Generate the percentage change in employment
	by industry firmsize : gen g_nat_ind_size`span'_ = ln(emp) - ln(emp[_n-`span']) 
	label var g_nat_ind_size`span' "Growth in national industry employment, by size"

	* Generate the difference in the share of employment
	by industry firmsize : gen d_nat_ind_share`span'_ = sizeshare - sizeshare[_n-`span']
	label var d_nat_ind_share`span' "Change in national industry employment share, by size"
	}
	
		
	*First Observation by Industry
		sort industry firmsize year
		by industry firmsize : gen sizeshare1_2000 = sizeshare[1] 	
		gen dn_i_sizeshare1_t_00 = sizeshare - sizeshare1_2000

		by industry : gen emp_2000 = totemp[1] 	
		gen n_i_gr_t_00 = (totemp - emp_2000)/emp_2000

	*Figure 2
		preserve
			*keep if industry == "44-45" | industry=="31-33" | industry=="61"
			export delimited "$LocalData\Results\US_Industry_SmallShare.csv", replace
		restore
	
	
	* Keep only what is needed, and reshape to match diagnostic code format.
	keep industry firmsize year g_nat_ind_size* d_nat_ind_share*
	sort industry year
	reshape wide g_nat_ind_size* d_nat_ind_share*, i(industry year) j(firmsize)

	
	noi disp "Saving `Data'.dta"
	compress
	save "$LocalData\\`Data'.dta", replace
	save "$LocalData\Archive\\`Data'`CurrentDate'.dta", replace
}
* This runs if no error was returned.
else noi disp "Data already exists."
noi etime
noi disp " "